Network Models and Information Theory

Erin M. Buchanan

01/08/2020

Language Topics Discussed

Cognitive Science

Network terminology

Example Network

Network Statistics

Network Statistics

Network Statistics

Let’s Try Examples

#r chunk
#install.packages(c("memnet", "jsonlite", "dplyr", "tidytext", "widyr", "ggplot2", "igraph", "ggraph", "tidyr"))
library(memnet)
## Loading required package: Rcpp
library(jsonlite)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidytext)
library(widyr)
library(ggplot2)
library(igraph)
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(ggraph)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:igraph':
## 
##     crossing

Verbal Fluency

Example Data

##r chunk
data("animal_fluency")
head(animal_fluency)
## $`70`
##  [1] "cow"       "chicken"   "horse"     "pig"       "rabbit"    "fox"      
##  [7] "wolf"      "hyena"     "lion"      "tiger"     "panther"   "bear"     
## [13] "elk"       "moose"     "reindeer"  "chimp"     "pony"      "armadillo"
## [19] "skunk"     "bear"      "raccoon"   "whale"     "dolphin"   "turtle"   
## [25] "frog"      "snake"    
## 
## $`52`
##  [1] "cat"      "dog"      "parakeet" "leopard"  "gorilla"  "monkey"  
##  [7] "chicken"  "rooster"  "duck"     "donkey"   "horse"    "cow"     
## [13] "snake"    "frog"     "gerbil"   "hamster"  "squirrel" "rabbit"  
## [19] "deer"     "moose"    "elk"      "buffalo" 
## 
## $`49`
##  [1] "chicken"  "snake"    "horse"    "cow"      "pig"      "turkey"  
##  [7] "dog"      "cat"      "frog"     NA         "rabbit"   NA        
## [13] "gopher"   "weasel"   NA         NA         "opossum"  "donkey"  
## [19] "elephant" "giraffe"  "lion"     "tiger"    "hyena"    "deer"    
## [25] "antelope" "bear"     "fish"     "coyote"   "wolf"     "fox"     
## [31] "eagle"    "robin"    "sparrow" 
## 
## $`53`
##  [1] "cat"          "dog"          "rat"          "mouse"        "rabbit"      
##  [6] "turtle"       "fish"         "alligator"    "bear"         "crocodile"   
## [11] "elephant"     "giraffe"      "monkey"       "human"        "lion"        
## [16] "tiger"        "zebra"        "hippopotamus" "orangutan"   
## 
## $`57`
##  [1] "dog"          "cat"          "lizard"       "bird"         "whale"       
##  [6] "fish"         "dolphin"      "penguin"      "gerbil"       "hamster"     
## [11] "rat"          "armadillo"    "alligator"    "koala"        "lion"        
## [16] "tiger"        "monkey"       "giraffe"      "elephant"     "hippopotamus"
## [21] "kangaroo"     "seal"        
## 
## $`39`
##  [1] "giraffe"   "elephant"  "zebra"     "kangaroo"  "bear"      "alligator"
##  [7] "beaver"    "dog"       "cat"       "rabbit"    "cow"       "goat"     
## [13] "chicken"   "sheep"     "llama"     "pig"       "turtle"    "bison"    
## [19] "camel"     "lion"      "tiger"

Example Data

##r chunk
#get the ages which are the list names
age <- as.numeric(names(animal_fluency))

Network Types in memnet

Older Networks

##r chunk
# infer networks for age > 70
net_comunity <- community_graph(animal_fluency[age > 70])
net_threshold <- threshold_graph(animal_fluency[age > 70])
net_rw <- rw_graph(animal_fluency[age > 70])

Younger Networks

##r chunk
net_comunity2 <- community_graph(animal_fluency[age <= 60])
net_threshold2 <- threshold_graph(animal_fluency[age <= 60])
net_rw2 <- rw_graph(animal_fluency[age <= 60])

Plots - Community

##r chunk - cex controls size of text
network_plot(edg_to_adj(net_comunity), nod_cex = 2, lab_cex = 1)

network_plot(edg_to_adj(net_comunity2), nod_cex = 2, lab_cex = 1)

Plots - Threshold

##r chunk - cex controls size of text
network_plot(edg_to_adj(net_threshold), nod_cex = 2, lab_cex = 1)

network_plot(edg_to_adj(net_threshold2), nod_cex = 2, lab_cex = 1)

Plots - Random Walk

##r chunk - cex controls size of text
network_plot(edg_to_adj(net_rw), nod_cex = 2, lab_cex = .5, lab_lwd = 1, lab_grid_size = 70)

network_plot(edg_to_adj(net_rw2), nod_cex = 2, lab_cex = .5, lab_lwd = 1, lab_grid_size = 70)

Network Statistics

V = number of nodes E = number of edges K = degree C = Clustering coefficient L = shortest path S = Small world A = assortivity

##r chunk - can do other networks in the same way to compare
network_stats(edg_to_adj(net_comunity))
##         |V|         |E|           k           C          Cc           L 
##  91.0000000 170.0000000   1.8681319   0.2890767  14.0814358   3.6721981 
##          Lc           S           A           p 
##   4.1809481   3.3680006  -0.2343542   0.9780220
network_stats(edg_to_adj(net_comunity2))
##         |V|         |E|           k           C          Cc           L 
##  91.0000000 185.0000000   2.0329670   0.3829349  17.1409963   3.5095737 
##          Lc           S           A           p 
##   4.0615814   4.2202764  -0.1800739   0.9780220

Something else fun

##r chunk
#k is the number of hops
neighborhood_plot(edg_to_adj(net_comunity), k = 3, node = 'cat', nod_cex = 2, lab_cex = 1)

neighborhood_plot(edg_to_adj(net_comunity2), k = 3, node = 'cat', nod_cex = 2, lab_cex = 1)

Example: NASA’s metadata

What is JSON?

NASA JSON

##r chunk
metadata <- fromJSON("nasa_data.JSON")
names(metadata$dataset)
##  [1] "accessLevel"                 "landingPage"                
##  [3] "bureauCode"                  "issued"                     
##  [5] "@type"                       "modified"                   
##  [7] "references"                  "keyword"                    
##  [9] "contactPoint"                "publisher"                  
## [11] "identifier"                  "description"                
## [13] "title"                       "programCode"                
## [15] "distribution"                "license"                    
## [17] "accrualPeriodicity"          "theme"                      
## [19] "citation"                    "temporal"                   
## [21] "graphic-preview-description" "graphic-preview-file"       
## [23] "spatial"                     "language"                   
## [25] "release-place"               "creator"                    
## [27] "data-presentation-form"      "series-name"                
## [29] "issue-identification"        "describedBy"                
## [31] "editor"                      "describedByType"            
## [33] "dataQuality"                 "rights"

What data to use?

##r chunk
head(metadata$dataset$keyword) #it's a list!
## [[1]]
## [1] "international rosetta mission" "earth"                        
## [3] "unknown"                      
## 
## [[2]]
## [1] "jet propulsion laboratory" "completed"                
## 
## [[3]]
## [1] "goddard space flight center" "completed"                  
## 
## [[4]]
##  [1] "ngda"                           "soils"                         
##  [3] "biosphere"                      "ecological dynamics"           
##  [5] "earth science"                  "vegetation"                    
##  [7] "atmospheric water vapor"        "atmospheric radiation"         
##  [9] "precipitation"                  "ecosystems"                    
## [11] "atmospheric temperature"        "atmosphere"                    
## [13] "land surface"                   "national geospatial data asset"
## 
## [[5]]
## [1] "national geospatial data asset" "ngda"                          
## 
## [[6]]
## [1] "near earth asteroid rendezvous" "eros"

What data to use?

##r chunk
head(metadata$dataset$title)
## [1] "ROSETTA-ORBITER EARTH RPCMAG 2 EAR2 RAW V3.0"                            
## [2] "Sealed Planetary Return Canister (SPRC), Phase II"                       
## [3] "Enhanced ORCA and CLARREO Depolarizers Using AR Microstructures, Phase I"
## [4] "LINKAGES: An Individual-based Forest Ecosystem Biogeochemistry Model"    
## [5] "ERBE_S4G_MFOV_NF"                                                        
## [6] "NEAR EROS RADIO SCIENCE DATA SET - EROS/ORBIT V1.0"
head(metadata$dataset$description)
## [1] "This dataset contains EDITED RAW DATA of the second Earth Flyby (EAR2). The closest approach (CA) took place on November 13, 2007 at 20:57"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
## [2] "Sample return missions have primary importance in future planetary missions. A basic requirement is that samples be returned in pristine, uncontaminated condition, necessitating development of a canister system capable of maintaining cleanliness and seal integrity through a variety of environments. Further development of the Sealed Planetary Return Canister (SPRC), is proposed after a successful Phase 1 program. Besides providing a high integrity seal, the canister incorporates features for robotic manipulation and to allow the sample to be accessed in a controlled manner upon return to Earth. The SPRC seal system addresses the two most significant concerns for planetary samples  seal surfaces contaminated by the sample and high pressure due to the phase change of volatiles. The SPRC incorporates a novel sealing system evolved from the only marginally successful Apollo indium knife edge seal approach but with added features to address the difficulties and inconsistencies observed. The indium is contained within a protective barrier to prevent against contamination, and the knife edge is mechanically cleaned during the sealing process. The container body can be configured to accommodate a variety of samples including rock cores, rock fragments, regolith, dust, and frozen soil. Atmospheric samples can also be preserved. The design is readily scalable and adaptable to specific missions. The prototype developed in Phase 1 demonstrated a leakage rate of less than 1e-6 cc-atm/s, meeting the primary science requirement."                                                                                                                                                                                                                                                                                                                                                                                                                                                                    
## [3] "Next generation Earth Science Satellites ORCA and CLARREO are designed to measure our planet's ocean and climate health.  Using hyper-spectral imaging at wavelengths ranging from the UV through NIR, these instruments will record the levels of the earth's temperature rise over the course of a decade.  To make such detailed measurements, polarization effects at various wavelengths due to multiple factors must be eliminated using an optical device known as a \"de-polarizer\".  For the CLARREO de-polarizer, four quartz windows are needed to randomize the polarization state of the observed reflected light spectrum.  Multiple reflections from 4 surfaces produce losses up to 14% of the incident light, a level high enough to produce \"ghost\" effects superimposed on the desired earth images resulting in reduced image contrast and greater measurement error.  An anti-reflection (AR) treatment is needed that can withstand the radiation and temperature effects caused by the mission environment while reducing reflection losses to levels of fractions of one percent.  A new type of AR treatment, being developed for many military and commercial applications, is based on surface relief microstructures fabricated directly in a window, optic, or sensor material.  AR microstructures (ARMs) can suppress internal reflections to levels unattainable by conventional thin-film AR coating technology.  To extend the performance benefits of ARMs to hyper-spectral imaging systems, it is proposed that the fabrication processes developed for fused silica, glass, silicon, and many other optical materials be adapted for use with the quartz and magnesium fluoride depolarizers planned for the ORCA and CLARREO missions.  In addition, an investigation of innovative surface microstructure technology is proposed for the fabrication of a new type of non-scattering, micro-textured depolarizer with inherent AR properties that can be applied to multiple optical elements within a spectrometer system."
## [4] "This model product contains the source codes for version 1 of the individual-based forest ecosystem biogeochemistry model LINKAGES and two subsequent versions as well as example input and output data. LINKAGES predicts long-term structure and dynamics of forest ecosystems as constrained by nitrogen availability, climate, and soil moisture. Model simulations compare favorably to field data from different geographic areas worldwide. LINKAGES, written in FORTRAN and provided in ASCII format, simulates birth, growth, and death of all trees greater than 1.43-cm dbh. Litter fall and decomposition are also simulated. Sunlight is the driving variable. Growing season degree days, soil water availability, and AET are calculated from precipitation, temperature, soil field moisture capacity, and wilting point. Decomposition and soil N availability are calculated from organic matter quantity and carbon chemistry, evapotranspiration, and degree of canopy closure. Light availability to each tree is a function of leaf biomass of taller trees. Degree days and availabilities of light and water constrain species reproduction. These variables plus soil N constrain tree growth and carbon accumulation in biomass. Tree death probability increases with age and slow growth. Leaf, root, and woody litter are returned to the soil at the end of each year to decay the following year. Climatic and forest data for eastern North America and New South Wales are provided as example model inputs. Modelers may use their own site data within any version of LINKAGES. Example model output is also provided."                                                                                                                                                                                                                                                                                                                                                                                                             
## [5] "Earth Radiation Budget Experiment (ERBE) S-4G Nonscanner, Medium Field of View (MFOV) Numerical Filter (NF) 5 degree Regional Averages in Hierarchical Data Format"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    
## [6] "The NEAR Eros Radio Science Data Set is a time-ordered collection of raw and partially processed data collected during the NEAR orbital mapping of the asteroid 433 Eros."

Cleaning up the data

##r chunk
nasa_title <- tibble(id = metadata$dataset$identifier,
                         title = metadata$dataset$title)
head(nasa_title)
## # A tibble: 6 x 2
##   id                                      title                                 
##   <chr>                                   <chr>                                 
## 1 urn:nasa:pds:context_pds3:data_set:dat… ROSETTA-ORBITER EARTH RPCMAG 2 EAR2 R…
## 2 TECHPORT_9532                           Sealed Planetary Return Canister (SPR…
## 3 TECHPORT_9174                           Enhanced ORCA and CLARREO Depolarizer…
## 4 C179124965-ORNL_DAAC                    LINKAGES: An Individual-based Forest …
## 5 C1000000743-LARC_ASDC                   ERBE_S4G_MFOV_NF                      
## 6 urn:nasa:pds:context_pds3:data_set:dat… NEAR EROS RADIO SCIENCE DATA SET - ER…

Cleaning up the data

##r chunk
nasa_desc <- tibble(id = metadata$dataset$identifier,
                        desc = metadata$dataset$description)
head(nasa_desc)
## # A tibble: 6 x 2
##   id                                     desc                                   
##   <chr>                                  <chr>                                  
## 1 urn:nasa:pds:context_pds3:data_set:da… "This dataset contains EDITED RAW DATA…
## 2 TECHPORT_9532                          "Sample return missions have primary i…
## 3 TECHPORT_9174                          "Next generation Earth Science Satelli…
## 4 C179124965-ORNL_DAAC                   "This model product contains the sourc…
## 5 C1000000743-LARC_ASDC                  "Earth Radiation Budget Experiment (ER…
## 6 urn:nasa:pds:context_pds3:data_set:da… "The NEAR Eros Radio Science Data Set …

Cleaning up the data

##r chunk
nasa_keyword <- tibble(id = metadata$dataset$identifier,
                           keyword = metadata$dataset$keyword) %>% 
  unnest(keyword)

head(nasa_keyword)
## # A tibble: 6 x 2
##   id                                                     keyword                
##   <chr>                                                  <chr>                  
## 1 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcm… international rosetta …
## 2 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcm… earth                  
## 3 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcm… unknown                
## 4 TECHPORT_9532                                          jet propulsion laborat…
## 5 TECHPORT_9532                                          completed              
## 6 TECHPORT_9174                                          goddard space flight c…

Why separate?

##r chunk
nrow(nasa_title)
## [1] 24349
nrow(nasa_desc)
## [1] 24349
nrow(nasa_keyword)
## [1] 101818

Stop Words and Bag of Words

##r chunk
nasa_title <- nasa_title %>% 
  unnest_tokens(word, title) %>% 
  anti_join(stop_words)
## Joining, by = "word"
nasa_desc <- nasa_desc %>% 
  unnest_tokens(word, desc) %>% 
  anti_join(stop_words)
## Joining, by = "word"
head(nasa_title)
## # A tibble: 6 x 2
##   id                                                                      word  
##   <chr>                                                                   <chr> 
## 1 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 roset…
## 2 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 orbit…
## 3 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 earth 
## 4 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 rpcmag
## 5 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 2     
## 6 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 ear2
head(nasa_desc)
## # A tibble: 6 x 2
##   id                                                                      word  
##   <chr>                                                                   <chr> 
## 1 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 datas…
## 2 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 edited
## 3 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 raw   
## 4 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 data  
## 5 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 earth 
## 6 urn:nasa:pds:context_pds3:data_set:data_set.ro-e-rpcmag-2-ear2-raw-v3.0 flyby

Number of Words

##r chunk
nrow(nasa_title)
## [1] 198649
nrow(nasa_desc)
## [1] 2273737
nrow(nasa_keyword)
## [1] 101818

Simple statistics

##r chunk
nasa_title %>% 
  count(word, sort = TRUE)
## # A tibble: 16,088 x 2
##    word       n
##    <chr>  <int>
##  1 phase   8658
##  2 data    2957
##  3 ii      2588
##  4 v1.0    1705
##  5 1       1577
##  6 system  1436
##  7 ges     1212
##  8 disc    1211
##  9 global  1126
## 10 2       1101
## # … with 16,078 more rows

Simple statistics

##r chunk
nasa_desc %>% 
  count(word, sort = TRUE)
## # A tibble: 54,103 x 2
##    word       n
##    <chr>  <int>
##  1 data   35586
##  2 system 16519
##  3 phase  11935
##  4 space   9859
##  5 nasa    8845
##  6 based   8835
##  7 2       8469
##  8 1       7969
##  9 design  7604
## 10 time    7586
## # … with 54,093 more rows

Simple statistics

##r chunk
nasa_keyword %>% 
  group_by(keyword) %>% 
  count(sort = TRUE)
## # A tibble: 6,860 x 2
## # Groups:   keyword [6,860]
##    keyword                            n
##    <chr>                          <int>
##  1 completed                       9021
##  2 national geospatial data asset  7262
##  3 ngda                            7262
##  4 earth science                   6948
##  5 active                          2885
##  6 atmosphere                      2734
##  7 land surface                    1673
##  8 goddard space flight center     1537
##  9 glenn research center           1535
## 10 langley research center         1390
## # … with 6,850 more rows

What have we learned?

Exclude numbers

##r chunk
nasa_title <-  nasa_title[-grep("[0-9]", nasa_title$word), ] 
nasa_title %>% 
  count(word, sort = TRUE)
## # A tibble: 12,665 x 2
##    word        n
##    <chr>   <int>
##  1 phase    8658
##  2 data     2957
##  3 ii       2588
##  4 system   1436
##  5 ges      1212
##  6 disc     1211
##  7 global   1126
##  8 space    1019
##  9 based     973
## 10 version   861
## # … with 12,655 more rows
nasa_desc <-  nasa_desc[-grep("[0-9]", nasa_desc$word), ] 
nasa_desc %>% 
  count(word, sort = TRUE)
## # A tibble: 44,792 x 2
##    word        n
##    <chr>   <int>
##  1 data    35586
##  2 system  16519
##  3 phase   11935
##  4 space    9859
##  5 nasa     8845
##  6 based    8835
##  7 design   7604
##  8 time     7586
##  9 systems  7486
## 10 surface  7456
## # … with 44,782 more rows

Collocates!

##r chunk
title_word_pairs <- nasa_title %>% 
  pairwise_count(word, id, sort = TRUE, upper = FALSE)

head(title_word_pairs)
## # A tibble: 6 x 3
##   item1 item2      n
##   <chr> <chr>  <dbl>
## 1 phase ii      2498
## 2 ges   disc    1211
## 3 phase system   948
## 4 phase space    637
## 5 phase based    601
## 6 ges   degree   589

Collocates!

##r chunk
desc_word_pairs <- nasa_desc %>% 
  pairwise_count(word, id, sort = TRUE, upper = FALSE)

head(desc_word_pairs)
## # A tibble: 6 x 3
##   item1  item2      n
##   <chr>  <chr>  <dbl>
## 1 data   set     4139
## 2 data   system  3305
## 3 data   time    3144
## 4 phase  ii      3063
## 5 data   nasa    2730
## 6 system nasa    2728

Collocates!

##r chunk
keyword_pairs <- nasa_keyword %>% 
  pairwise_count(keyword, id, sort = TRUE, upper = FALSE)

head(keyword_pairs)
## # A tibble: 6 x 3
##   item1         item2                              n
##   <chr>         <chr>                          <dbl>
## 1 ngda          national geospatial data asset  7262
## 2 ngda          earth science                   6857
## 3 earth science national geospatial data asset  6857
## 4 earth science atmosphere                      2731
## 5 ngda          atmosphere                      2721
## 6 atmosphere    national geospatial data asset  2721

Make a network plot

##r chunk
set.seed(52550)
title_word_pairs %>%
  filter(n >= 250) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") + #use ?ggraph to see all the options
  geom_edge_link(aes(edge_alpha = n, edge_width = n), edge_colour = "purple") +
  geom_node_point(size = 5) +
  geom_node_text(aes(label = name), repel = TRUE, 
                 point.padding = unit(0.2, "lines")) +
  theme_void()

Make a network plot

##r chunk
desc_word_pairs %>%
  filter(n >= 2500) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") + #use ?ggraph to see all the options
  geom_edge_link(aes(edge_alpha = n, edge_width = n), edge_colour = "purple") +
  geom_node_point(size = 5) +
  geom_node_text(aes(label = name), repel = TRUE, 
                 point.padding = unit(0.2, "lines")) +
  theme_void()

Make a network plot

##r chunk
keyword_pairs %>%
  filter(n >= 1000) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") + #use ?ggraph to see all the options
  geom_edge_link(aes(edge_alpha = n, edge_width = n), edge_colour = "purple") +
  geom_node_point(size = 5) +
  geom_node_text(aes(label = name), repel = TRUE, 
                 point.padding = unit(0.2, "lines")) +
  theme_void()

Strongest pairs

##r chunk
keyword_cors <- nasa_keyword %>% 
  group_by(keyword) %>%
  filter(n() >= 50) %>%
  pairwise_cor(keyword, id, sort = TRUE, upper = FALSE)

head(keyword_cors)
## # A tibble: 6 x 3
##   item1      item2                          correlation
##   <chr>      <chr>                                <dbl>
## 1 ngda       national geospatial data asset       1    
## 2 dashlink   ames                                 1    
## 3 knowledge  sharing                              1.00 
## 4 schedule   expedition                           1.00 
## 5 turbulence models                               0.997
## 6 appel      knowledge                            0.997

Visualize correlated pairs

##r chunk
keyword_cors %>%
  filter(correlation > .6) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(edge_alpha = correlation, edge_width = correlation), edge_colour = "purple") +
  geom_node_point(size = 5) +
  geom_node_text(aes(label = name), repel = TRUE,
                 point.padding = unit(0.2, "lines")) +
  theme_void()

What have you learned?